# I hate homework
library(ggplot2)
library(network)
## network: Classes for Relational Data
## Version 1.13.0.1 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
library(stringr)
library(visNetwork)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ purrr 0.2.4
## ✔ tidyr 0.8.0 ✔ dplyr 0.7.4
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(jsonlite) # read in the JSON data from the API
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(dplyr) # data munging
library(igraph) # work with graphs in R
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggnetwork) # devtools::install_github("briatte/ggnetwork")
library(intergraph) # ggnetwork needs this to wield igraph things
library(ggrepel) # fancy, non-ovelapping labels
library(svgPanZoom) # zoom, zoom
library(DT) # pretty tables
library(ggnet)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(SnowballC)
library(tidytext)
library(stringr)
library(quanteda)
## Package version: 1.1.1
## Parallel computing: 2 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following objects are masked from 'package:tm':
##
## as.DocumentTermMatrix, stopwords
## The following object is masked from 'package:utils':
##
## View
library(stringi)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:igraph':
##
## %--%
## The following object is masked from 'package:base':
##
## date
sen_follow <- read.csv("/Users/Sumin/Desktop/R/senators_follow.csv")
sen_twt <- read.csv("/Users/Sumin/Desktop/R/senators_twitter.csv")
#top and last three senates for following and followed by other senates
link <- sen_follow
link_flw <- link%>%
select(source, target, following)%>%
group_by(source) %>%
summarise(sum_flw = sum(following))
link_flw <- link_flw %>%
arrange(desc(sum_flw))
##### Top 3 Senators for following people
head(link_flw)
## # A tibble: 6 x 2
## source sum_flw
## <fct> <int>
## 1 SenatorCollins 82
## 2 SenJohnMcCain 78
## 3 lisamurkowski 76
## 4 SenatorBurr 72
## 5 SenJohnBarrasso 72
## 6 RoyBlunt 71
link_flwed <- link%>%
select(source, target, followed_by)%>%
group_by(source) %>%
summarise(sum_flwed = sum(followed_by))
link_flwed <- link_flwed %>%
arrange(desc(sum_flwed))
##### Top 3 Senators for being followed
head(link_flwed)
## # A tibble: 6 x 2
## source sum_flwed
## <fct> <int>
## 1 SenJeffMerkley 96
## 2 MarkWarner 95
## 3 SenJohnKennedy 95
## 4 SenMarkey 95
## 5 SenatorEnzi 94
## 6 MikeCrapo 93
#link_all <- full_join(link_flw,link_flwed, by = "source")
#link_all <- link_all %>%
# mutate(sum = sum_flw + sum_flwed) %>%
# arrange(desc(sum))
##plot the network
melted_link <-
link %>%
dplyr::filter(following == TRUE) %>%
dplyr::select(source, target)
melted_graph <- graph_from_data_frame(d = melted_link, directed = T)
V(melted_graph)$size <- centralization.degree(melted_graph)$res
set.seed(1234)
link1_df <- ggnetwork(melted_graph, layout = "fruchtermanreingold", arrow.gap = 0, cell.jitter = 0)
## Loading required package: sna
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
link1_df$x <- as.vector(link1_df$x)
link1_df$y <- as.vector(link1_df$y)
link1_df$xend <- as.vector(link1_df$xend)
link1_df$yend <- as.vector(link1_df$yend)
link1_df <- link1_df %>%
inner_join(sen_twt, by = c('vertex.names' = 'Official.Twitter')) %>%
dplyr::select(-State, -Senator, -Staff.Twitter, -Campaign.Twitter, -label)
## Warning: Column `vertex.names`/`Official.Twitter` joining factors with
## different levels, coercing to character vector
color <- c("Democratic Party" = "#2b8cbe", "Republican Party"= "#de2d26", "Independent" = "#2ca25f")
g1 <- ggplot()+
geom_edges(data = link1_df, aes(x = x, y = y, xend = xend, yend = yend), color = "#a5a29f", curvature=0.1, size=0.15, alpha=1/2, arrow = arrow(length = unit(12, "pt")))+
geom_nodes(data = link1_df, aes(x = x, y = y, xend = xend, yend = yend, color = Party.affiliation, size = sqrt(size)/ pi), alpha = .8)+
geom_label_repel(data= unique(link1_df[link1_df$size>150,c(1,2,5)]),
aes(x=x, y=y, label=vertex.names),
size=2, color="#111111")+
scale_color_manual(values = color, name = "Party Affiliation")+
theme_blank()+
ggtitle("Network of Senators on Twitter: Followers")+
labs(size="Area of Centrality")
## Warning: Ignoring unknown aesthetics: xend, yend
wc <- cluster_walktrap(melted_graph) # find "communities"
members <- membership(wc)
member_df <- data.frame(Group = as.vector(members), twt = names(members), stringsAsFactors = T)
member_df$Group <- as.character(member_df$Group)
link2_df <- link1_df %>%
inner_join(member_df, by = c('vertex.names' = 'twt'))
## Warning: Column `vertex.names`/`twt` joining character vector and factor,
## coercing into character vector
color2 <- c("2" = "#2b8cbe", "1"= "#de2d26")
g2 <- ggplot()+
geom_edges(data = link2_df, aes(x = x, y = y, xend = xend, yend = yend), color = "#a5a29f",curvature=0.1, size=0.15, alpha=1/2)+
geom_nodes(data = link2_df, aes(x = x, y = y, xend = xend, yend = yend, color = Group, size = sqrt(size)/ pi), alpha = .8)+
geom_label_repel(data= unique(link2_df[link2_df$size>150,c(1,2,5)]),
aes(x=x, y=y, label=vertex.names),
size=2, color="#111111")+
theme_blank()+
scale_color_manual(values = color2, name = "Cluster Identification")+
ggtitle("Network of Senators on Twitter: Cluster Identification")+
labs(size="Area of Centrality")
## Warning: Ignoring unknown aesthetics: xend, yend
g2_1 <-
ggplot(link2_df, aes(x = Party.affiliation, y = Group))+
geom_point(aes(color = Group))+
scale_color_manual(values = color2)+
ggtitle("Party Affiliation vs. Cluster Identification")+
xlab("Party Affiliation")+
ylab("Cluster Identification")+
theme_bw()+
theme(legend.position = "right")
g1

g2

g2_1

tweet <- readRDS("/Users/Sumin/Desktop/R/senator_tweets.RDS")
q2_hashtags <- tweet$hashtags
hashtags_cp <- Corpus(VectorSource(q2_hashtags))
set.seed(3)
wordcloud(hashtags_cp, max.words = 100, colors = c('#bfd3e6','#9ebcda','#8c96c6','#8c6bb1','#88419d','#6e016b'))
## Warning in wordcloud(hashtags_cp, max.words = 100, colors = c("#bfd3e6", :
## taxreform could not be fit on page. It will not be plotted.
## Warning in wordcloud(hashtags_cp, max.words = 100, colors = c("#bfd3e6", :
## trumpcare could not be fit on page. It will not be plotted.
text(x=0.5, y=1, "Most Common Hashtag over Time for Senators")

#clean the data
tweet_new <- tweet %>%
select(created_at, screen_name, hashtags)
tweet_new$created_at <- lubridate::ymd_hms(tweet_new$created_at)
tweet_new <- tweet_new %>%
filter(created_at >= "2018-01-01")
tweet_new <- tweet_new %>%
inner_join(sen_twt, by = c('screen_name' = 'Official.Twitter')) %>%
dplyr::select(-State, -Senator, -Staff.Twitter, -Campaign.Twitter, -label)
## Warning: Column `screen_name`/`Official.Twitter` joining character vector
## and factor, coercing into character vector
tweet_d <- tweet_new %>%
filter(Party.affiliation == "Democratic Party")
tweet_r <- tweet_new %>%
filter(Party.affiliation == "Republican Party")
tweet_dcp <- Corpus(VectorSource(tweet_d$hashtags))
tweet_rcp <- Corpus(VectorSource(tweet_r$hashtags))
set.seed(3)
wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6", "#a6bddb", "#74a9cf", "#3690c0", "#0570b0", "#034e7b"))
## Warning in wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6",
## "#a6bddb", : netneutrality could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6",
## "#a6bddb", : protectdreamers could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6",
## "#a6bddb", : trumpbudget could not be fit on page. It will not be plotted.
## Warning in wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6",
## "#a6bddb", : infrastructure could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(tweet_dcp, max.words = 100, colors = c("#d0d1e6",
## "#a6bddb", : dreamactnow could not be fit on page. It will not be plotted.
text(x=0.5, y=1, "Most Common Hashtags from Democratic Senators in 2018")

set.seed(123)
wordcloud(tweet_rcp, max.words = 100, colors = c("#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#a50f15", "#67000d"))
text(x=0.5, y=0.9, "Most Common Hashtags from Republican Senators in 2018")

##I thought it's q2b so I have files named q2b and I am lazy to change them :p
tweet_q2b <- tweet %>%
select(created_at, screen_name, hashtags)
tweet_q2b$created_at <- lubridate::ymd_hms(tweet_q2b$created_at)
tweet_q2b <- tweet_q2b %>%
filter(created_at >= "2010-01-01")
supports_gun_control <- c('guncontrol', 'gunsense', 'gunsafety', 'neveragain','marchforourlives','guncontrolnow','control','shooting','gunviolence','safe','gunlaws')
supports_gun_ownership <- c('2ndamendment', 'gunrights', 'nra', 'progun', 'bodyguard','NRA','liberals','people','GunOwners', 'Freedom','2A','DefendTheSecond')
tweet_q2b <- tweet_q2b %>%
unnest(hashtags) %>%
mutate(
supports_gun_control = hashtags %in% supports_gun_control,
supports_gun_ownership = hashtags %in% supports_gun_ownership
) %>%
filter(supports_gun_control | supports_gun_ownership)
tweet_q2b <- tweet_q2b %>%
inner_join(sen_twt, by = c('screen_name' = 'Official.Twitter'))%>%
dplyr::select(-State, -Senator, -Staff.Twitter, -Campaign.Twitter, -label)
## Warning: Column `screen_name`/`Official.Twitter` joining character vector
## and factor, coercing into character vector
plot_q2b1 <- ggplot(tweet_q2b, aes(x = created_at, y = supports_gun_control))+
geom_point(aes(color = Party.affiliation))+
scale_color_manual(values = color)+
ggtitle("Senators' Tweets Support Gun Control, 2010-2018", subtitle = c('Hashtags: #guncontrol, #gunsense, #gunsafety, #neveragain, #marchforourlives'))+
xlab("Year")+
ylab("Support Gun Control")+
theme_bw()+
theme(legend.position ="right",legend.title=element_blank())
plot_q2b2 <- ggplot(tweet_q2b, aes(x = created_at, y = supports_gun_ownership))+
geom_point(aes(color = Party.affiliation))+
scale_color_manual(values = color)+
ggtitle("Senators' Tweets Support Gun Ownership, 2010-2018", subtitle = c('Hashtags: #2ndamendment, #gunrights, #nra, #progun, #bodyguard'))+
xlab("Year")+
ylab("Support Gun Owernship")+
theme_bw()+
theme(legend.position ="right",legend.title=element_blank())
#word cloud
tweet_q2b_dem <- tweet_q2b %>%
filter(Party.affiliation == "Democratic Party")
tweet_q2b_rep <- tweet_q2b %>%
filter(Party.affiliation == "Republican Party")
tweet_q2b_dem <- Corpus(VectorSource(tweet_q2b_dem$hashtags))
tweet_q2b_rep <- Corpus(VectorSource(tweet_q2b_rep$hashtags))
set.seed(1)
wordcloud(tweet_q2b_dem, min.freq =1, max.words = 100, scale=c(3,1),
colors = "#2b8cbe")
text(x=0.5, y=1, "Gun-related Hashtags from Democratic Senators' Tweets, 2010-2018")

set.seed(1)
wordcloud(tweet_q2b_rep, min.freq =1, max.words = 100, scale=c(3,.5),
colors = "#de2d26")
text(x=0.5, y=0.9, "Gun-related Hashtags from Republican Senators' Tweets, 2010-2018")

plot_q2b1

plot_q2b2

tweet_q2d <- tweet %>%
select(created_at, screen_name, hashtags)
tweet_q2d$created_at <- lubridate::ymd_hms(tweet_q2d$created_at)
tweet_q2d <- tweet_q2d %>%
filter(created_at >= "2018-02-14")
tweet_q2d <- tweet_q2d %>%
unnest(hashtags)
tweet_q2dhash <- tweet_q2d %>%
filter(hashtags %in% supports_gun_control | hashtags %in% supports_gun_ownership) %>%
select(hashtags)
tweet_q2dcp <- Corpus(VectorSource(tweet_q2dhash))
set.seed(3)
wordcloud(tweet_q2dcp,min.freq = 1, max.words = 100, colors = brewer.pal(8,"Greens"), scale=c(3,.5))
text(x=0.5, y=1, "Senators' Gun-Related Hashtags on Twitter after Parkland Shooting")

tweet_q3a <- tweet %>%
filter(is_retweet == T) %>%
select(created_at, screen_name, text, mentions_screen_name)
#data$var <- str_extract(string = data$tweet, pattern = perl("(?<=RT @).*(?=:)"))
#str_extract(string = tweet_q3a$text, pattern = "(?<=@).*(?=:)")
#str_extract(string = tweet_q3a$text, pattern = perl("(?<=RT @).+(?=:)"))
#tweet_q3a$text1 <- str_split_fixed(tweet_q3a$text, ": ", 2)
tweet_q3a <- tweet_q3a %>%
unnest(mentions_screen_name)
tweet_q3a <- tweet_q3a %>%
arrange(created_at, screen_name, text) %>%
group_by(created_at)%>%
slice(1)
#filter
tweetq3a_dem <- sen_twt %>%
filter(Party.affiliation == "Democratic Party") %>%
select(Official.Twitter)
tweetq3a_dem <- lapply(tweetq3a_dem, as.character)
tweetq3a_dem <- unlist(tweetq3a_dem)
tweetq3a_rep <- sen_twt %>%
filter(Party.affiliation == "Republican Party") %>%
select(Official.Twitter)
tweetq3a_rep <- lapply(tweetq3a_rep, as.character)
tweetq3a_rep <- unlist(tweetq3a_rep)
# have these names?
tweet_q3a$rt_dem <- tweet_q3a$mentions_screen_name %in% tweetq3a_dem
tweet_q3a$rt_rep <- tweet_q3a$mentions_screen_name %in% tweetq3a_rep
#calculate the amount of rt
#time groups
tweet_q3a$created_at <- lubridate::ymd_hms(tweet_q3a$created_at)
#setDT(tweet_q3a)
tweet_q3a$interval = year(tweet_q3a$created_at) %/% 1
tweet_q3a <-tweet_q3a %>%
arrange(screen_name)%>%
group_by(screen_name, interval) %>%
add_tally(rt_dem) %>%
rename("rt_demsum" = "n")
tweet_q3a <- tweet_q3a %>%
arrange(screen_name)%>%
group_by(screen_name, interval) %>%
add_tally(rt_rep) %>%
rename("rt_repsum" = "n")
tweet_q3a <- tweet_q3a %>%
inner_join(sen_twt, by = c('screen_name' = 'Official.Twitter'))%>%
select(-State, -Senator, -Staff.Twitter, -Campaign.Twitter, -label)%>%
mutate(text = NULL)
## Warning: Column `screen_name`/`Official.Twitter` joining character vector
## and factor, coercing into character vector
tweet_q3a <- tweet_q3a %>%
mutate(gap = ifelse(Party.affiliation == "Democratic Party", rt_demsum / rt_repsum,ifelse(Party.affiliation == "Republican Party", rt_repsum/ rt_demsum, NA)))
# ready for plotting
tweet_q3a <- tweet_q3a[is.finite(tweet_q3a$gap),]
plot_q3a <- ggplot(subset(tweet_q3a, Party.affiliation != "Independent"), aes(x = created_at, y = gap))+
geom_point(aes(color = Party.affiliation))+
facet_wrap(~Party.affiliation)+
scale_color_manual(values = color)+
ggtitle('Which Party Is More Intolerant?', subtitle = "A Peek from Senator's Retweets:\nRt from the Own Party ÷ Rt from the Opposite Party")+
xlab("Year")+
ylab("Gap")+
theme_bw()+
theme(legend.position ="right",legend.title=element_blank())+
geom_smooth(method = lm, se = F)
plot_q3a

tweet_q3b <- tweet %>%
filter(is_retweet == F) %>%
select(screen_name, mentions_screen_name)
tweet_q3b <- tweet_q3b %>%
unnest(mentions_screen_name)
tweet_q3b <- tweet_q3b %>%
filter(tweet_q3b$mentions_screen_name == T %in% tweetq3a_dem | tweet_q3b$mentions_screen_name %in% tweetq3a_rep == T)
#graph object
graph_q3b <- graph_from_data_frame(d = tweet_q3b, directed = F)
V(graph_q3b)$size <- centralization.degree(graph_q3b)$res
set.seed(1234)
q3b_df <- ggnetwork(graph_q3b, layout = "fruchtermanreingold", arrow.gap = 0, cell.jitter = 0)
## Warning in fortify.network(intergraph::asNetwork(model), ...): duplicated
## edges detected
q3b_df$x <- as.vector(q3b_df$x)
q3b_df$y <- as.vector(q3b_df$y)
q3b_df$xend <- as.vector(q3b_df$xend)
q3b_df$yend <- as.vector(q3b_df$yend)
q3b_df <- q3b_df %>%
inner_join(sen_twt, by = c('vertex.names' = 'Official.Twitter')) %>%
dplyr::select(-State, -Senator, -Staff.Twitter, -Campaign.Twitter, -label)
## Warning: Column `vertex.names`/`Official.Twitter` joining factors with
## different levels, coercing to character vector
plot_q3b <- ggplot()+
geom_edges(data = q3b_df, aes(x = x, y = y, xend = xend, yend = yend), color = "#a5a29f",curvature=0.1, size=0.15, alpha=1/2)+
geom_nodes(data = q3b_df, aes(x = x, y = y, xend = xend, yend = yend, color = Party.affiliation, size = sqrt(size)/ pi), alpha = .8)+
geom_label_repel(data= unique( q3b_df[ q3b_df$size>260,c(1,2,5)]),
aes(x=x, y=y, label=vertex.names),
size=2, color="#111111")+
theme_blank()+
scale_color_manual(values = color, name = "Party Affiliation")+
ggtitle("Network of Senators on Twitter: Mentions")+
labs(size="Area of Centrality")
## Warning: Ignoring unknown aesthetics: xend, yend
plot_q3b

library(httr)
##
## Attaching package: 'httr'
## The following object is masked from 'package:NLP':
##
## content
library(rtweet)
library(twitteR)
##
## Attaching package: 'twitteR'
## The following object is masked from 'package:rtweet':
##
## lookup_statuses
## The following objects are masked from 'package:dplyr':
##
## id, location
options(httr_oauth_cache=T)
#req <- GET("https://api.twitter.com/1.1/statuses/home_timeline.json",
# config(token = twitter_token))
#tweetsq3c <- content(req)
# available data for first tweet on my timeline
#names(tweetsq3c[[1]])
#tweetsq3c[[1]]$user$name
#writeLines(tweetsq3c[[1]]$text)
### I didn't finish the bonus :p